import pyaudio
import wave
import rospy
from audio_common_msgs.msg import AudioData
from std_msgs.msg import String
import os,sys
import _pickle as cPickle
import numpy as np
from scipy.io.wavfile import read
from gmm2_code.speakerfeatures import extract_features
import warnings
import datetime,time
import shutil

warnings.filterwarnings("ignore")



class MyAudio(object):
    audio_data = []

    def __init__(self):
        self.sample_rate = rospy.get_param('audio_capture/sample_rate')
        self.depth = 1  # rospy.get_param('audio_capture/depth')
        self.channels = rospy.get_param('audio_capture/channels')
        self.pub = rospy.Publisher('recognition', String, queue_size=1, tcp_nodelay=True)
        self.audio_sub = rospy.Subscriber('/audio', AudioData, self.recode_data)
        self.audio_data = []
        print('Please begin to speak ,for 5 s')
        self.save_data()
        self.face_info = 'linkunling' #这里要从人脸那边获取实时信息,这里有待完善

    def del_face_name(self):
        '''会保存音频，以免音频出现多人状态。
        '''
        if len(self.face_info) > 2: print('Not only one people ')
        else :
            self.face_name = 'linkunling'#这里要从人脸那边获取name
            self.deal_face_file()


    def record_to_file(self,data , wav_name):
        "Records from the microphone and outputs the resulting data to 'path'"
        #data = pack('<' + ('h' * len(data)), *data)
        wf = wave.open(wav_name, 'wb')
        wf.setnchannels(self.channels)
        wf.setsampwidth(2)
        wf.setframerate(self.sample_rate)
        #wf.writeframes(data)
        wf.writeframes(b''.join(data))
        #print(len(data))
        wf.close()

    def recode_data(self, msg):
        j = 0
        for i in range(160):
            aa = (msg.data[j]&0xff) | ((msg.data[j+1]&0xff)<<8)
            j += 2
            self.audio_data.append(aa)

    def softmax(self,x):
        return np.exp(x) / np.sum(np.exp(x), axis=0)

    def task_predict(self,modelpath):
        gmm_files = [os.path.join(modelpath, fname) for fname in
                     os.listdir(modelpath) if fname.endswith('.gmm')]
        # Load the Gaussian gender Models
        models = [cPickle.load(open(fname, 'rb')) for fname in gmm_files]
        speakers = [fname.split("\\")[-1].split(".gmm")[0].split("/")[1] for fname
                    in gmm_files]
        #self.len_speaker = len(speakers)

        #begin to test file_path
        sr, audio = read(self.wav_name)
        vector = extract_features(audio, sr)
        # print(vector)
        log_likelihood = np.zeros(len(models))
        # print(log_likelihood)
        for i in range(len(models)):
            gmm = models[i]  # checking with each model one by one
            scores = np.array(gmm.score(vector))
            log_likelihood[i] = scores.sum()
        winner = np.argmax(log_likelihood)
        score = self.softmax(log_likelihood)
        if max(score) > 0.8:
            self.pre_speaker = speakers[winner]
        else:
            self.pre_speaker = 'Nobody'
        #print(max(score))
        #print("\tdetected as - ", pre_speaker)
        #return self.pre_speaker


    def predict(self):
        modelpath = "gmm2_models/"

        #self.pre_speaker = self.task_predict(modelpath,file_paths)

        self.task_predict(modelpath)

        output_str = "bear  %s" %(self.pre_speaker)
        print(output_str)
        self.pub.publish(output_str)

    def deal_file(self):
        len_speaker = len(os.listdir('audio_wav'))
        if self.pre_speaker == 'Nobody':
            wav_time_name = self.wav_name.split('_')[1]
            new_wav_name = 'host' + str(len_speaker + 1) + wav_time_name
            os.rename(self.wav_name,new_wav_name)
            os.mkdir('audio_wav/host' + str(len_speaker + 1))
            shutil.move(new_wav_name ,'audio_wav/host' + str(len_speaker + 1))
        else :
            shutil.move(self.wav_name, 'audio_wav/' + self.pre_speaker)

    def deal_face_file(self):
        if self.pre_speaker == 'Nobody':
            new_wav_name = self.face_name  + self.wav_name.split('_')[1] #name + 时间戳
            os.rename(self.wav_name,new_wav_name)
            if os.path.exists(os.path.join('audio_wav', self.face_name)):
                shutil.move(new_wav_name, os.path.join('audio_wav', self.face_name))
            else:
                os.mkdir(os.path.join('audio_wav', self.face_name))
                shutil.move(new_wav_name, os.path.join('audio_wav', self.face_name))
        else :
            if self.pre_speaker ==self.face_name:
                new_wav_name = self.face_name + self.wav_name.split('_')[1]  # name + 时间戳
                shutil.move(new_wav_name, os.path.join('audio_wav', self.face_name))

    def save_data(self):
        rate = rospy.Rate(100)
        count = 0
        while not rospy.is_shutdown():
            count += 1
            if count > 500:
                self.aa = np.array(self.audio_data, dtype=np.uint16)
                self.wav_name = 'outaudio_' + str(int(time.time())) + '.wav'
                self.record_to_file(self.aa,self.wav_name )
                self.predict()
                self.deal_file()
                #self.deal_face_file()
                break
            rate.sleep()



def main():
    '''main'''
    rospy.init_node("node_name", anonymous=True)
    MyAudio()
    print('End')


if __name__ == '__main__':
    while True:
        #starttime = datetime.datetime.now()
        main()
        #endtime = datetime.datetime.now()
        #print((endtime - starttime).seconds, 's')

